{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "os.chdir('C:\\\\Users\\\\Administrator\\\\Desktop\\\\JC数据集')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 整理不易，欢迎关注作者公众号：数据不吹牛\n",
    "## 场景一\n",
    "#### 导入成绩表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>姓名</th>\n",
       "      <th>科目</th>\n",
       "      <th>综合成绩</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>李华</td>\n",
       "      <td>一模</td>\n",
       "      <td>651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>李华</td>\n",
       "      <td>二模</td>\n",
       "      <td>579</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>李华</td>\n",
       "      <td>三模</td>\n",
       "      <td>580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>王雷</td>\n",
       "      <td>一模</td>\n",
       "      <td>475</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>王雷</td>\n",
       "      <td>二模</td>\n",
       "      <td>455</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>王雷</td>\n",
       "      <td>三模</td>\n",
       "      <td>432</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   姓名  科目  综合成绩\n",
       "0  李华  一模   651\n",
       "1  李华  二模   579\n",
       "2  李华  三模   580\n",
       "3  王雷  一模   475\n",
       "4  王雷  二模   455\n",
       "5  王雷  三模   432"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score = pd.read_excel('apply案例数据.xlsx',sheetname = '成绩表')\n",
    "score.head(6)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 得到每位同学最高成绩"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>姓名</th>\n",
       "      <th>综合成绩</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>张建国</td>\n",
       "      <td>691</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>李华</td>\n",
       "      <td>651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>李子明</td>\n",
       "      <td>577</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>王雷</td>\n",
       "      <td>475</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    姓名  综合成绩\n",
       "0  张建国   691\n",
       "1   李华   651\n",
       "2  李子明   577\n",
       "3   王雷   475"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_score = score.groupby('姓名')['综合成绩'].apply(max).reset_index()\n",
    "max_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 得到每位同学最低成绩"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>姓名</th>\n",
       "      <th>综合成绩</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>张建国</td>\n",
       "      <td>553</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>李华</td>\n",
       "      <td>579</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>李子明</td>\n",
       "      <td>490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>王雷</td>\n",
       "      <td>432</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    姓名  综合成绩\n",
       "0  张建国   553\n",
       "1   李华   579\n",
       "2  李子明   490\n",
       "3   王雷   432"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min_score = score.groupby('姓名')['综合成绩'].apply(min).reset_index()\n",
    "min_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 两表合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>姓名</th>\n",
       "      <th>最好成绩</th>\n",
       "      <th>最差成绩</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>张建国</td>\n",
       "      <td>691</td>\n",
       "      <td>553</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>李华</td>\n",
       "      <td>651</td>\n",
       "      <td>579</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>李子明</td>\n",
       "      <td>577</td>\n",
       "      <td>490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>王雷</td>\n",
       "      <td>475</td>\n",
       "      <td>432</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    姓名  最好成绩  最差成绩\n",
       "0  张建国   691   553\n",
       "1   李华   651   579\n",
       "2  李子明   577   490\n",
       "3   王雷   475   432"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score_combine = pd.merge(max_score,min_score,left_on = '姓名',right_on = '姓名',how = 'inner')\n",
    "score_combine.columns = ['姓名','最好成绩','最差成绩']\n",
    "score_combine"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 场景二"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>省份</th>\n",
       "      <th>城市</th>\n",
       "      <th>近1月销售额</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>重庆</td>\n",
       "      <td>重庆市</td>\n",
       "      <td>255343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>浙江省</td>\n",
       "      <td>金华市</td>\n",
       "      <td>302624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>浙江省</td>\n",
       "      <td>台州市</td>\n",
       "      <td>147853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>浙江省</td>\n",
       "      <td>舟山市</td>\n",
       "      <td>136547</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>浙江省</td>\n",
       "      <td>杭州市</td>\n",
       "      <td>109073</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    省份   城市  近1月销售额\n",
       "0   重庆  重庆市  255343\n",
       "1  浙江省  金华市  302624\n",
       "2  浙江省  台州市  147853\n",
       "3  浙江省  舟山市  136547\n",
       "4  浙江省  杭州市  109073"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "order = pd.read_excel('apply案例数据.xlsx',sheetname = '省市销售数据')\n",
    "order.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 信息概览"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 210 entries, 0 to 209\n",
      "Data columns (total 3 columns):\n",
      "省份        210 non-null object\n",
      "城市        210 non-null object\n",
      "近1月销售额    210 non-null int64\n",
      "dtypes: int64(1), object(2)\n",
      "memory usage: 5.0+ KB\n"
     ]
    }
   ],
   "source": [
    "order.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 按省份销售额进行排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "order_rank = order.sort_values(['省份','近1月销售额'],ascending = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 分组并传入指定列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.DataFrameGroupBy object at 0x0000000007B5CA20>"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "order_rank.groupby('省份')[['城市','近1月销售额']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 定义筛选规则函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_third(x):\n",
    "    #如果分组长度小于等于1，意味着直辖市\n",
    "    if len(x) <= 1:\n",
    "        #返回第0行的所有值，即直辖市本身\n",
    "        return x.iloc[0,:]\n",
    "    \n",
    "    #针对非直辖市\n",
    "    else:\n",
    "        #直接返回第3行（排名第3，索引是2）所有值（城市，近1月销售额）\n",
    "        return x.iloc[2,:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### apply应用，返回结果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>城市</th>\n",
       "      <th>近1月销售额</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>省份</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>上海</th>\n",
       "      <td>上海市</td>\n",
       "      <td>139261</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>云南省</th>\n",
       "      <td>昆明市</td>\n",
       "      <td>203210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>内蒙古自治区</th>\n",
       "      <td>兴安盟</td>\n",
       "      <td>258106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京</th>\n",
       "      <td>北京市</td>\n",
       "      <td>154682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>四川省</th>\n",
       "      <td>达州市</td>\n",
       "      <td>427285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>天津</th>\n",
       "      <td>天津市</td>\n",
       "      <td>510720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>安徽省</th>\n",
       "      <td>合肥市</td>\n",
       "      <td>179518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>山东省</th>\n",
       "      <td>菏泽市</td>\n",
       "      <td>187375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>山西省</th>\n",
       "      <td>临汾市</td>\n",
       "      <td>186931</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广东省</th>\n",
       "      <td>肇庆市</td>\n",
       "      <td>321218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>江苏省</th>\n",
       "      <td>镇江市</td>\n",
       "      <td>139625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>江西省</th>\n",
       "      <td>南昌市</td>\n",
       "      <td>141011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>河北省</th>\n",
       "      <td>邯郸市</td>\n",
       "      <td>127017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>河南省</th>\n",
       "      <td>焦作市</td>\n",
       "      <td>151812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>浙江省</th>\n",
       "      <td>舟山市</td>\n",
       "      <td>136547</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>湖北省</th>\n",
       "      <td>襄阳市</td>\n",
       "      <td>194910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>湖南省</th>\n",
       "      <td>长沙市</td>\n",
       "      <td>163263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>甘肃省</th>\n",
       "      <td>天水市</td>\n",
       "      <td>119378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>福建省</th>\n",
       "      <td>莆田市</td>\n",
       "      <td>209084</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>辽宁省</th>\n",
       "      <td>葫芦岛市</td>\n",
       "      <td>392363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>重庆</th>\n",
       "      <td>重庆市</td>\n",
       "      <td>255343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>陕西省</th>\n",
       "      <td>安康市</td>\n",
       "      <td>60456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          城市  近1月销售额\n",
       "省份                  \n",
       "上海       上海市  139261\n",
       "云南省      昆明市  203210\n",
       "内蒙古自治区   兴安盟  258106\n",
       "北京       北京市  154682\n",
       "四川省      达州市  427285\n",
       "天津       天津市  510720\n",
       "安徽省      合肥市  179518\n",
       "山东省      菏泽市  187375\n",
       "山西省      临汾市  186931\n",
       "广东省      肇庆市  321218\n",
       "江苏省      镇江市  139625\n",
       "江西省      南昌市  141011\n",
       "河北省      邯郸市  127017\n",
       "河南省      焦作市  151812\n",
       "浙江省      舟山市  136547\n",
       "湖北省      襄阳市  194910\n",
       "湖南省      长沙市  163263\n",
       "甘肃省      天水市  119378\n",
       "福建省      莆田市  209084\n",
       "辽宁省     葫芦岛市  392363\n",
       "重庆       重庆市  255343\n",
       "陕西省      安康市   60456"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "order_rank.groupby('省份')[['城市','近1月销售额']].apply(get_third)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
